# Comparing experiment 1 and experiment 2 data
# Rachel Bernhard
# 4-6-2020

#clear environment
rm(list = ls())

#load libraries
library(pacman)
p_load(tidyverse)

#load data
exp1 <- read.csv("01-Experiment-1/data/04-clean-data/clean_data.csv", header = TRUE, stringsAsFactors = FALSE)
exp2 <- read.csv("02-Experiment-2/data/04-clean-data/clean_data.csv", header = TRUE, stringsAsFactors = FALSE)

#renaming "texter_id" in study 2 to match "texter.id"
names(exp2)[9] <- "texter.id"

#change texter IDs from study 2 to distinguish between study 1 and study 2 when we merge
max.texter.id <- unique(exp1$texter.id) %>% max()
exp2$texter.id <- exp2$texter.id + max.texter.id

#create a dummy variable to represent fixed effects
exp1$experiment1 <- 1
exp2$experiment1 <- 0

# rename offensive variables
names(exp1)[9] <- "offensive"
names(exp2)[28] <- "offensive"

#keep variables
keep.vars <- c("offensive", "male", "female", "gen.neutral", "no.name", "male.instrument",
               "female.instrument", "gen.neutral.instrument", "no.name.instrument", "gender", 
               "texter.id", "texter.gender", "experiment1", "responded")

#merging the two datasets
dt <- rbind(exp1[keep.vars],
            exp2[keep.vars])


# 1st attempt at recoding--anything where either coder marks as somewhat offensive becomes a 100
dt$offensive_binary <- ifelse(dt$experiment1==1, dt$offensive,
                              ifelse(dt$experiment1==0&dt$offensive==0, 0, 100))

###Robustness coding
#If the score is greater than 50, then it is rated as offensive, non-offensive otherwise
dt$offensive_50 <- ifelse(dt$experiment1==1, dt$offensive,
                              ifelse(dt$experiment1==0&dt$offensive<50, 0, 100))

#If the score is greater than 40, then it is rated as offensive, non-offensive otherwise
dt$offensive_40 <- ifelse(dt$experiment1==1, dt$offensive,
                          ifelse(dt$experiment1==0&dt$offensive<40, 0, 100))

#If the score is greater than 30, then it is rated as offensive, non-offensive otherwise
dt$offensive_30 <- ifelse(dt$experiment1==1, dt$offensive,
                              ifelse(dt$experiment1==0&dt$offensive<30, 0, 100))

#If the score is greater than 20, then it is rated as offensive, non-offensive otherwise
dt$offensive_20 <- ifelse(dt$experiment1==1, dt$offensive,
                              ifelse(dt$experiment1==0&dt$offensive<20, 0, 100))

write.csv(dt, "06-Pooled-Offensiveness/data/clean_data.csv")

